# delimit;
clear all;
set seed 1;

*******************************************************************************;
*******************************************************************************;
*******************************************************************************;
*******************************************************************************;
*1) Generate Latent Data;
*******************************************************************************;
*******************************************************************************;
*******************************************************************************;
*******************************************************************************;

*****************************;
*Set Parameters;
*****************************;

*set # of obs;
set obs 100000;
*Mean of Initial Conditions (Skills and Income);
matrix mu = [ 0 \ 1 ];
*Variance/Covariance Matrix of Initial Conditions (Skills and Income);
matrix Sigma = [1 , 0.5 \ 0.5 , 1 ];

*Draw standard normal shocks;
gen sn1 = rnormal(0,1);
*re-center simulated shocks;
qui sum sn1;
replace sn1 = (sn1-r(mean))/r(sd);

gen sn2 = rnormal(0,1);
*re-center simulated shocks;
qui sum sn2;
replace sn2 = (sn2-r(mean))/r(sd);

*Cholesky Decomposition of Variance/Covariance Matrix;
matrix L = cholesky(Sigma);

*draw initial conditions (allowing correlation);
gen log_skills0 = mu[1,1] + L[1,1]*sn1;
gen log_income  = mu[2,1] + L[2,1]*sn1 + L[2,2]*sn2;

**************************;
**************************;
*Set Parameters;
**************************;
**************************;

*Investment equation parameters (2 periods);
*Order (columns) of paramaters wrt variables: skills, income, standard deviation of shocks (rows are periods);
*Important: the two share parameters of skills and income need to sum to 1 in each period (within each row);
matrix alpha = [ 0.5 , 0.5 , 1 \ 0.5 , 0.5 , 1 ];

*Technology (trans-log function with 2 periods);
*Order (columns) of paramaters wrt variables: TFP, skills, investment, skillsXinvestment, standard deviation of shocks (rows are periods);
matrix gamma = [ 2 , 0.5 , 0.5 , -0.1 , 1 \ 2 , 0.5 , 0.5 , -0.1 , 1 ];


forvalues t = 0(1)1{;
local ind_t = `t' + 1; 

*simulate shocks for investments;
gen shock_I`t' = rnormal(0,1);
*re-center simulated shocks;
qui sum shock_I`t';
replace shock_I`t' = (shock_I`t'-r(mean))/r(sd);

*simulate latent investments (here alpha[`ind_t',1] + alpha[`ind_t',2] = 1);
gen log_I`t' =  alpha[`ind_t',1]*log_skills`t' + alpha[`ind_t',2]*log_income + alpha[`ind_t',3]*shock_I`t';

*simulate shocks for skills;
gen shock_skills`t' = rnormal(0,1);
*re-center simulated shocks;
qui sum shock_skills`t';
replace shock_skills`t' = (shock_skills`t'-r(mean))/r(sd);

*simulate next-period latent skills;
gen log_skills`ind_t' = gamma[`ind_t',1] + gamma[`ind_t',2]*log_skills`t' +  gamma[`ind_t',3]*log_I`t' + gamma[`ind_t',4]*log_skills`t'*log_I`t' + gamma[`ind_t',5]*shock_skills`t';

};


*******************************************************************************;
*******************************************************************************;
*******************************************************************************;
*******************************************************************************;
*2) Generate Observed Data (with measurement error and unknown location and scale);
*******************************************************************************;
*******************************************************************************;
*******************************************************************************;
*******************************************************************************;

*measurement locations;
matrix mu_I = [ 1, 2, 3 \ 1, 2, 3 ];
matrix mu_skills = [ 1, 2, 3 \ 1, 2, 3 \ 1, 2, 3 ];

*measurement scales (factor loadings);
matrix lambda_I = [ 1, 0.5, 0.3 \ 1, 0.5, 0.3 ];
matrix lambda_skills = [ 1, 0.5, 0.3 \ 1, 0.5, 0.3 \ 1, 0.5, 0.3 ];

*Measures for Investments (two periods);
forvalues t = 0(1)1{;
local ind_t = `t' + 1; 
*Three measures;
forvalues j = 1(1)3{;
gen Z`j'_I`t' = mu_I[`ind_t',`j'] + lambda_I[`ind_t',`j']*log_I`t' + rnormal(0,0.2);
};
};


*Measures for Skills (three periods);
forvalues t = 0(1)2{;
local ind_t = `t' + 1; 
*Three measures;
forvalues j = 1(1)3{;
gen Z`j'_skills`t' = mu_skills[`ind_t',`j'] + lambda_skills[`ind_t',`j']*log_skills`t' + rnormal(0,0.2);
};
};


*******************************************************************************;
*******************************************************************************;
*******************************************************************************;
*******************************************************************************;
*3) Estimate the Model;
*******************************************************************************;
*******************************************************************************;
*******************************************************************************;
*******************************************************************************;

**********************************;
*pre-define matrixes for estimates;
**********************************;

*Measurement parameters;
matrix mu_I_hat      =J(2,3,0);
matrix mu_skills_hat =J(3,3,0);
matrix lambda_I_hat      =J(2,3,1);
matrix lambda_skills_hat =J(3,3,1);

*Investment equation parameters;
matrix alpha_hat = J(2,3,0);

*Technological parameters;
matrix gamma_hat = J(2,5,0);


*Recover Initial Measurement Error Parameters for Skills: Location of the Measurement Model for Initial Skills (mu);
forvalues j = 1(1)3{;
qui sum Z`j'_skills0;
matrix mu_skills_hat[1,`j']=r(mean);
*Assuming age-invariance for all measures (this is not needed, just to simplify the exercise);
matrix mu_skills_hat[2,`j'] = mu_skills_hat[1,`j'];
matrix mu_skills_hat[3,`j'] = mu_skills_hat[1,`j'];

};

*Recover Initial Measurement Error Parameters for Skills: Scale (factor loadings) of the Measurement Model for Initial Skills (lambda);
forvalues j = 2(1)3{;
forvalues h = 2(1)3{;

if `j'!=`h'{;

qui corr Z`j'_skills0 Z`h'_skills0 , cov;
local cov_j_h = r(cov_12);

qui corr Z1_skills0 Z`h'_skills0 , cov;
local cov_1_h = r(cov_12);

matrix lambda_skills_hat[1,`j'] = `cov_j_h'/`cov_1_h';

*Assuming age-invariance for all measures (this is not needed, just to simplify the exercise);
matrix lambda_skills_hat[2,`j'] = lambda_skills_hat[1,`j'];
matrix lambda_skills_hat[3,`j'] = lambda_skills_hat[1,`j'];
};

};
};

******************************************;
*Generate residualized measures of skills;
******************************************;
forvalues t = 0(1)2{;
forvalues j = 1(1)3{;
gen Z`j'_skills`t'_tilde = (Z`j'_skills`t' - mu_skills_hat[1,`j'])/lambda_skills_hat[1,`j'];
};
};


*****************************************************************************;
*****************************************************************************;
*Estimate Investment Function Parameters;
*****************************************************************************;
*****************************************************************************;

forvalues t = 0(1)1{;
local ind_t = `t' + 1; 
forvalues j = 1(1)3{;

*Estimate the "reduced-form" parameters via 2SLS;
qui ivreg2 Z`j'_I`t' ( Z1_skills`t'_tilde = Z2_skills`t'_tilde Z3_skills`t'_tilde ) log_income;

if `j'==1{;
*Recoved measurement parameters of latent investments;
matrix mu_I_hat[`ind_t',`j']=_b[_cons];
matrix lambda_I_hat[`ind_t',`j']=_b[Z1_skills`t'_tilde] + _b[log_income];

*Recover structural parameters of investment equation;
matrix alpha_hat[`ind_t',1]= _b[Z1_skills`t'_tilde]/lambda_I_hat[`ind_t',`j'];
matrix alpha_hat[`ind_t',2]= _b[log_income]/lambda_I_hat[`ind_t',`j'];

*Generate residuals of the investment model (to estimate variance of shocks);
gen res_I`t'_`j' = (Z`j'_I`t' - alpha_hat[`ind_t',1]*Z1_skills`t'_tilde - alpha_hat[`ind_t',2]*log_income)/lambda_I_hat[`ind_t',`j'];
};
else{;
matrix mu_I_hat[`ind_t',`j']=_b[_cons];
matrix lambda_I_hat[`ind_t',`j']=_b[Z1_skills`t'_tilde] + _b[log_income];
};

*Generate residualized measures of latent investments;
gen Z`j'_I`t'_tilde = (Z`j'_I`t' - mu_I_hat[`ind_t',`j'])/lambda_I_hat[`ind_t',`j'];
};
*Estimate the variance of investment shocks;
qui corr res_I`t'_  Z2_I`t'_tilde, cov ;
matrix alpha_hat[`ind_t',3] = sqrt(r(cov_12));
};


*****************************************************************************;
*****************************************************************************;
*Estimate Production Function Parameters;
*****************************************************************************;
*****************************************************************************;

forvalues t = 0(1)1{;
local ind_t = `t' + 1;

*create interaction term in the technology;
gen interaction_skills_inv`t' = Z1_skills`t'_tilde*Z1_I`t'_tilde;

*create instruments for the interaction term in the technology;
gen Z1_interaction_skills_inv`t' =  Z2_skills`t'_tilde*Z2_I`t'_tilde;
gen Z2_interaction_skills_inv`t' =  Z3_skills`t'_tilde*Z3_I`t'_tilde;

*Estimate the "reduced-form" parameters via 2SLS;
qui ivreg2 Z1_skills`ind_t'_tilde ( Z1_skills`t'_tilde Z1_I`t'_tilde  interaction_skills_inv`t' =  Z2_skills`t'_tilde Z3_skills`t'_tilde Z2_I`t'_tilde  Z3_I`t'_tilde Z1_interaction_skills_inv`t' Z2_interaction_skills_inv`t' )  ;

*Recover structural parameters of technology (under age-invariance assumption of measures);
matrix gamma_hat[`ind_t',1]=_b[_cons];
matrix gamma_hat[`ind_t',2]=_b[Z1_skills`t'_tilde];
matrix gamma_hat[`ind_t',3]=_b[Z1_I`t'_tilde];
matrix gamma_hat[`ind_t',4]=_b[interaction_skills_inv`t'];

*Generate residuals of the technology (to estimate variance of shocks);
gen res_skills`ind_t' = (Z1_skills`ind_t'_tilde - gamma_hat[`ind_t',2]*Z1_skills`t'_tilde -  gamma_hat[`ind_t',3]*Z1_I`t'_tilde - gamma_hat[`ind_t',4]*interaction_skills_inv`t' );

*Estimate the variance of technological shocks;
qui corr res_skills`ind_t' Z2_skills`ind_t'_tilde,cov;
matrix gamma_hat[`ind_t',5] = sqrt(r(cov_12));

};


display "True Parameters Investment Equation";
matrix list alpha;
display "Estimated Parameters Investment Equation";
matrix list alpha_hat;


display "True Parameters Technology";
matrix list gamma;
display "Estimated Parameters Technology";
matrix list gamma_hat;
